library(twitteR)
## Warning: package 'twitteR' was built under R version 4.2.2
library(rtweet)
## Warning: package 'rtweet' was built under R version 4.2.2
##
## Attaching package: 'rtweet'
## The following object is masked from 'package:twitteR':
##
## lookup_statuses
library(tm)
## Warning: package 'tm' was built under R version 4.2.2
## Loading required package: NLP
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:twitteR':
##
## id, location
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(RColorBrewer)
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.2.2
library(stringr)
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.2.2
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 4.2.2
library(wordcloud2)
## Warning: package 'wordcloud2' was built under R version 4.2.2
Extract from twitter using your developer’s credentials. Choose any keyword you want. Get 10000 observations “excluding retweets.
CONSUMER_KEY <- "zVjxW3DritguyMEzypBKcJ3fT"
CONSUMER_SECRET <- "KH1WJQgp9UgJFyEpz0R1ctYAnoQPkOJCaZd08u0gUjt5e9oBdF"
ACCESS_TOKEN <-"1595389319673704448-Q61VFf4QSUEDydIS3rxhH8DZUDzkQV"
ACCESS_SECRET <- "Qxy4x9RC0duHTPGaV312p9NXXUh69TwBuyplZ7EjyEpq4"
setup_twitter_oauth(consumer_key = CONSUMER_KEY,
consumer_secret = CONSUMER_SECRET,
access_token = ACCESS_TOKEN,
access_secret = ACCESS_SECRET)
## [1] "Using direct authentication"
trendTwts <- searchTwitter("Philippines -filter:retweets",
n = 10000,
lang = "en",
since = "2022-11-20",
until = "2022-11-27",
retryOnRateLimit = 120)
trendTwtsDF <- twListToDF(trendTwts)
trendTwtsDF
save(trendTwtsDF, file = "trendTwtsDF.Rdata")
head(trendTwtsDF$text)
## [1] "The Philippines is eyeing to work closely with the member states of the Association of Southeast Asian Nations in a… https://t.co/shvNXeKxYH"
## [2] "The Philippines is eyeing to work closely with the member states of the Association of Southeast Asian Nations in a… https://t.co/aweXcOaSgo"
## [3] "@EZPZ_Val Even I didn't won in your Giveaways , I am here your Solid Follower in the Philippines ❤️ Godbless soon I… https://t.co/DBY8AHsnm9"
## [4] "I paid 1$ to enter here at Mt. Palpalan Pagadian City Philippines - 1,882.5 feet https://t.co/v2K8tAcDjo"
## [5] "my aunt came back from the philippines with fresh mangoes 😋 https://t.co/WDPGtAC7v9"
## [6] "The Philippines and Australia have committed to strengthening their bilateral foreign policy and defense cooperatio… https://t.co/zwpSbLog5C"
sapply(trendTwtsDF, function(x) sum(is.na(x)))
## text favorited favoriteCount replyToSN created
## 0 0 0 5966 0
## truncated replyToSID id replyToUID statusSource
## 0 6094 0 5966 0
## screenName retweetCount isRetweet retweeted longitude
## 0 0 0 0 9917
## latitude
## 9917
trendsDF <- trendTwtsDF %>%
select(screenName,text,created,statusSource)
save(trendsDF, file = "trendsDF.Rdata")
Plot the time series from the date created, with legends.
trendsDF %>%
group_by(1) %>%
summarise(max = max(created), min = min(created))
trendsDF %<>%
mutate(Created_At_Round = created%>%
round(units = 'hours') %>%
as.POSIXct())
trendsDF %>% pull(created) %>% min()
## [1] "2022-11-24 15:10:31 UTC"
trendsDF %>% pull(created) %>% max()
## [1] "2022-11-26 23:59:16 UTC"
plt <- trendsDF %>%
dplyr::count(Created_At_Round) %>%
ggplot(mapping = aes(x = Created_At_Round, y = n)) +
theme_light() +
geom_line(aes(x = Created_At_Round, y = n, colour = "red") )+
xlab(label = 'Date') +
ylab(label = NULL) +
ggtitle(label = "Number of Tweets per Hour")
plt %>% ggplotly()
trendsDF %>%
group_by(1) %>%
summarise(max = max(created), min = min(created))
trendsDF %<>%
mutate(Created_At_Round = created%>%
round(units = 'hours') %>%
as.POSIXct())
Plot a graph (any graph you want) based on the type of device - found in Source - that the user use. Include the legends.
encodeSource <- function(x) {
if(grepl(">Twitter for iPhone</a>", x)){
"iphone"
}else if(grepl(">Twitter for iPad</a>", x)){
"ipad"
}else if(grepl(">Twitter for Android</a>", x)){
"android"
} else if(grepl(">Twitter Web Client</a>", x)){
"Web"
} else if(grepl(">Twitter for Windows Phone</a>", x)){
"windows phone"
}else if(grepl(">dlvr.it</a>", x)){
"dlvr.it"
}else if(grepl(">IFTTT</a>", x)){
"ifttt"
}else if(grepl(">Facebook</a>", x)){ .
"facebook"
}else {
"others"
}
}
trendsDF$tweetSource = sapply(trendsDF$statusSource,
encodeSource)
tweet_appSource <- trendsDF %>%
select(tweetSource) %>%
group_by(tweetSource) %>%
summarize(count=n()) %>%
arrange(desc(count))
deviceSource <- ggplot(trendsDF[trendsDF$tweetSource != 'others',], aes(tweetSource,
fill = tweetSource)) +
geom_bar() +
theme(legend.position="right",
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)) +
ylab("Number of tweets") +
ggtitle("Tweets by Source")
deviceSource
tweet_appScreen <- trendsDF %>%
select(screenName) %>%
group_by(screenName) %>%
summarize(count=n()) %>%
arrange(desc(count))
namesCorpus <- Corpus(VectorSource(trendsDF$screenName))
class(trendsDF$screenName)
## [1] "character"
library(wordcloud2)
wordcloud2(data=tweet_appScreen,
size=0.8,
color='random-dark',
shape = 'pentagon')